package com.cheng.spider.extract.url;

import com.cheng.spider.constant.MimeType;
import com.cheng.spider.extract.ExtractRule;
import com.cheng.spider.extract.HtmlDataResolver;
import com.cheng.spider.extract.Result;
import com.cheng.spider.extract.SaveRule;
import org.jsoup.nodes.Document;

import java.util.List;

public abstract class UrlExtractor extends HtmlDataResolver {

    protected ExtractRule initExtractorRule() {
        return new ExtractRule() {
            @Override
            public Result extract(Document document) {
                Result result = new Result();
                result.put("urlList", extractUrlList(document));
                return result;
            }
        };
    }

    protected SaveRule initSaveRule() {
        return new SaveRule() {
            @Override
            public void save(Result result, MimeType mimeType) {
                List<String> list = (List<String>) result.get("urlList");
                list.forEach(url -> {
                    resolveUrl(url);
                });
            }
        };
    }

    protected abstract List<String> extractUrlList(Document document);

    protected abstract void resolveUrl(String url);

}
